#encoding=utf8
#@Author: Zhangsida
#@Email: hmjhmj55@163.com
#@Time: 20-6-28 下午5:46

"""
其中1.txt是训练语料
2.txt是目标语句库
"""
import os,pickle,time
if __name__ == "__main__":
    start_time = time.time()
    dictSentence2KeyWordsStr = dict()
    with open("2.txt","r") as f:
        for ix,line in enumerate(f):
            splitsByPound = line.split('#')
            keyWordsStr = ' '.join(splitsByPound[:-1])
            sentence = splitsByPound[-1].strip()
            if keyWordsStr:
                dictSentence2KeyWordsStr[sentence] = keyWordsStr
            else:
                print("keyWordStr is empty:",line)
            print(ix,splitsByPound,keyWordsStr,sentence)
    with open('dictSentence2KeyWordsStr.pkl',"wb")as f:
        pickle.dump(dictSentence2KeyWordsStr, f)
        print(len(dictSentence2KeyWordsStr))

    # lines = dictSentence2KeyWordsStr.values()
    lines = []
    with open("1.txt",'r')as f:
        for ix, line in enumerate(f):
            splitsByPound = line.split('#')
            keyWordsStr = ' '.join(splitsByPound)
            sentence = keyWordsStr.strip()
            if keyWordsStr:
                lines.append(sentence)
            else:
                print("keyWordStr is empty:", line)
            print(ix, splitsByPound, keyWordsStr, sentence)
    goalPath = os.path.join("GloVe","text8")
    if not os.path.exists(goalPath):
        os.mknod(goalPath)
    with open(goalPath,'w')as f:
        for line in lines:
            f.write(line+"\n")
    end_time = time.time()
    print("过程耗时：",end_time-start_time,"秒")
